Time Until Large Events#

import pandas as pd
import numpy as np
import datetime as dt
csv_file = "../datasets/Formatted_ETAS_Output.csv"
etas = pd.read_csv(csv_file, sep = ',', lineterminator='\n')
csv_file = "../datasets/All (1960-2023).csv"
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
Hide code cell output
C:\Users\Vishal\AppData\Local\Temp\ipykernel_6948\2239509985.py:4: DtypeWarning: Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.
  usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})

Data Filtering#

  1. Converting the date columns to datetime

  2. Date > 1960-01-01 and < 2023-01-01

  3. Longitude > -123 and < -113

  4. Latitude > 29 and < 39

Hide code cell source
etas["Date"] = pd.to_datetime(etas["Date"], errors="coerce", format="%m/%d/%y")
etas.loc[etas["Date"].dt.year > pd.Timestamp.now().year, "Date"] -= pd.DateOffset(years=100)
etas = etas[(etas['Date'] > pd.to_datetime('1960-01-01')) & (etas['Date'] < pd.to_datetime('2023-01-01'))]

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
etas = etas[etas['X'] > -123]
etas = etas[etas['X'] < -113]
etas = etas[etas['Y'] < 39]
etas = etas[etas['Y'] > 29]
etas.head()
Date Time Year X Y Magnitude Z\r
1 1960-01-02 0:08:49.00 1960.006125 -115.6222 33.0793 4.25 7.9322
2 1960-01-02 0:10:31.00 1960.007305 -115.6323 33.1220 3.03 8.4015
3 1960-01-02 0:10:32.00 1960.007320 -115.5851 33.0745 3.03 7.9678
4 1960-01-02 0:11:07.00 1960.007720 -115.6256 33.0290 3.08 7.9737
5 1960-01-02 0:11:17.00 1960.007840 -115.6050 33.0276 3.61 7.9322
Hide code cell source
usgs["Date"] = pd.to_datetime(usgs["time"], errors="coerce").dt.strftime("%Y-%m-%d")
usgs.drop(columns=["time"], inplace=True)
usgs = usgs[(pd.to_datetime(usgs['Date']) > pd.to_datetime('1960-01-01')) & (pd.to_datetime(usgs['Date']) < pd.to_datetime('2023-01-01'))]

usgs['longitude'] = pd.to_numeric(usgs['longitude'], errors='coerce')
usgs['latitude'] = pd.to_numeric(usgs['latitude'], errors='coerce')
usgs['mag'] = pd.to_numeric(usgs['mag'], errors='coerce')

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs = usgs[usgs['longitude'] > -123]
usgs = usgs[usgs['longitude'] < -113]
usgs = usgs[usgs['latitude'] < 39]
usgs = usgs[usgs['latitude'] > 29]

time = []
for i in usgs['Date']:
    time.append(pd.to_datetime(i))
usgs['Date'] = time
usgs.head()
latitude longitude depth mag magType nst gap dmin rms net ... place type horizontalError depthError magError magNst status locationSource magSource\r Date
240 33.397500 -116.393333 3.88 4.14 mw 132 16 0.07391 0.19 ci ... 16 km N of Borrego Springs, CA earthquake 0.1 0.38 NaN 6 reviewed ci ci\r 2022-12-31
241 34.355667 -116.921833 4.73 3.47 mw 121 25 0.07845 0.15 ci ... 11km SSE of Lucerne Valley, CA earthquake 0.09 0.41 NaN 4 reviewed ci ci\r 2022-12-31
246 37.620167 -122.025000 3.82 3.34 mw 141 16 NaN 0.16 nc ... 3km N of Union City, CA earthquake 0.1 0.17 NaN 3 reviewed nc nc\r 2022-12-22
262 37.918167 -122.304000 5.48 3.57 mw 170 19 0.01598 0.15 nc ... 1km ENE of El Cerrito, CA earthquake 0.1 0.17 NaN 4 reviewed nc nc\r 2022-12-17
263 36.604667 -121.209333 8.88 3.28 ml 67 55 0.03812 0.09 nc ... 10km NW of Pinnacles, CA earthquake 0.14 0.28 0.129 72 reviewed nc nc\r 2022-12-13

5 rows × 22 columns

Data Grouping And Merging#

Data is grouped into 1 day chunks based on the max magnitude

max_mag_etas = pd.DataFrame(etas.groupby(etas['Date'].dt.to_period('D')).Magnitude.max())
max_mag_etas.reset_index(inplace=True)
max_mag_etas.head()
Date Magnitude
0 1960-01-02 4.25
1 1960-01-03 3.90
2 1960-01-04 4.24
3 1960-01-05 3.40
4 1960-01-06 3.47
max_mag_usgs = pd.DataFrame(usgs.groupby(usgs['Date'].dt.to_period('D')).mag.max())
max_mag_usgs.reset_index(inplace=True)
max_mag_usgs.head()
Date mag
0 1960-01-02 4.04
1 1960-01-05 3.03
2 1960-01-07 3.64
3 1960-01-08 3.10
4 1960-01-11 3.79
large_earthquake = 6

Large Events#

A label is added to Large Event data

large_mag_etas = max_mag_etas.copy()
large_mag_etas["Large Event"] = (large_mag_etas["Magnitude"] > large_earthquake).astype(int)
large_mag_etas["Date"] = large_mag_etas["Date"].dt.to_timestamp()
large_mag_etas['time_diff'] = large_mag_etas.loc[large_mag_etas['Large Event'] == 1, 'Date'].diff().dt.days
large_mag_etas['time_diff'].iloc[0] = pd.NA
Hide code cell output
C:\Users\Vishal\AppData\Local\Temp\ipykernel_6948\609931718.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  large_mag_etas['time_diff'].iloc[0] = pd.NA
large_mag_etas.head()
Date Magnitude Large Event time_diff
0 1960-01-02 4.25 0 NaN
1 1960-01-03 3.90 0 NaN
2 1960-01-04 4.24 0 NaN
3 1960-01-05 3.40 0 NaN
4 1960-01-06 3.47 0 NaN
large_mag_usgs = max_mag_usgs.copy()
large_mag_usgs["Large Event"] = (large_mag_usgs["mag"] > large_earthquake).astype(int)
large_mag_usgs["Date"] = large_mag_usgs["Date"].dt.to_timestamp()
large_mag_usgs['time_diff'] = large_mag_usgs.loc[large_mag_usgs['Large Event'] == 1, 'Date'].diff().dt.days
large_mag_usgs['time_diff'].iloc[0] = pd.NA
Hide code cell output
C:\Users\Vishal\AppData\Local\Temp\ipykernel_6948\50271692.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  large_mag_usgs['time_diff'].iloc[0] = pd.NA
large_mag_usgs.head()
Date mag Large Event time_diff
0 1960-01-02 4.04 0 NaN
1 1960-01-05 3.03 0 NaN
2 1960-01-07 3.64 0 NaN
3 1960-01-08 3.10 0 NaN
4 1960-01-11 3.79 0 NaN

Graphing Time Until Large Events#

import plotly.express as px
import plotly.graph_objects as go
Hide code cell source
fig = go.Figure(data=[go.Bar(
    x=large_mag_etas['Date'],
    y=large_mag_etas['time_diff'],
)])
# Customize the bar appearance
fig.update_traces(marker_line_color='black', marker_line_width=1)  # Set bar color to red and make the bar border black and thicker
# Customize the plot layout
fig.update_layout(
    title='Time Difference Bar Chart (ETAS)',
    xaxis_title='Date',
    yaxis_title='Time Difference (Days)',
)

# Show the plot
fig.show()
Hide code cell source
fig = go.Figure(data=[go.Bar(
    x=large_mag_usgs['Date'],
    y=large_mag_usgs['time_diff'],
)])
# Customize the bar appearance
fig.update_traces(marker_color='red', marker_line_color='black', marker_line_width=1)  # Set bar color to red and make the bar border black and thicker

# Customize the plot layout
fig.update_layout(
    title='Time Difference Bar Chart (USGS)',
    xaxis_title='Date',
    yaxis_title='Time Difference (Days)',
)

# Show the plot
fig.show()